version 16

clear
insheet using "Data/hiringmanagersurvey.csv", clear
summarize noexp exp black white
ttest noexp == exp
ttest black == white

gen diff_exp = exp - noexp
gen diff_race = white - black
ttest diff_race == diff_exp

**********************************************
clear
insheet using "Data/callback.csv"
gen minority=0
replace minority=1 if race=="b"
gen experience=0
replace experience=1 if exp=="TRUE"
gen male=1
replace male=0 if gender=="Female"
tabulate location, generate(city_dummy)
gen interaction=minority*experience
tabulate minority experience, summarize(callback) means

***********
* TABLE I *
***********

* (1)
reg call minority, cluster(url)
* (2)
reg call minority if experience==0, cluster(url)
* (3)
reg call minority if experience==1, cluster(url)
* (4)
reg call experience, cluster(url)
* (5)
reg call minority experience interaction, cluster(url)

clear all
set more off

* Set parameters
local n_max = 35
local pB0_callback = 0.13
local pW0_callback = 0.26
local pB1_callback = 0.27
local pW1_callback = 0.31
local pB0_convert = 0.55
local pW0_convert = 0.55
local pB1_convert = 0.70
local pW1_convert = 0.70

* Calculate offer probabilities
local pB0_offer = `pB0_callback' * `pB0_convert'
local pW0_offer = `pW0_callback' * `pW0_convert'
local pB1_offer = `pB1_callback' * `pB1_convert'
local pW1_offer = `pW1_callback' * `pW1_convert'

* Generate data
set obs `=`n_max'+1'
gen n = _n - 1

gen pB0_noHire = (1-`pB0_offer')^n
gen pW0_noHire = (1-`pW0_offer')^n
gen pB0_AnyHire = 1 - pB0_noHire
gen pW0_AnyHire = 1 - pW0_noHire

gen total = pW0_noHire*`pW0_offer' + pW0_AnyHire*`pW1_offer' - pB0_noHire*`pB0_offer' - pB0_AnyHire*`pB1_offer'
gen systemic = pW0_noHire*`pB0_offer' + pW0_AnyHire*`pB1_offer' - pB0_noHire*`pB0_offer' - pB0_AnyHire*`pB1_offer'
gen direct = total - systemic

*************
* Figure VI *
*************

* Create stacked area chart
* Note: The vertical dashed lines indicating the 10th, 50th, and 90th percentiles, as well as the brackets, were added to this graph outside of Stata.

graph twoway `vline' `text' ///
       (area direct n, color(green%50)) ///
       (rarea direct total n, color(yellow%50)) ///
       (line total n, lcolor(blue) lwidth(medium)), ///
       xlabel(0(5)30) ///
       ylabel(0(0.02)0.08) ///
       xtitle("Number of Jobs") ///
       ytitle("Discrimination") ///
       legend(order(3 2 1) ///
              label(1 "Direct Disc.") ///
              label(2 "Systemic Disc.") ///
              label(3 "Total Disc.") ///
			  rows(1)) ///
       title("Decomposition of Total Discrimination") ///
       scheme(s2color) ///
       graphregion(color(white))

* Export
graph export "Output/figureVI.png", replace width(1000)


*********************************************************************
* Display marginal means with clustered SEs - used later in Figure V*
*********************************************************************

clear
insheet using "Data/callback.csv", clear

gen race_cat = cond(race == "b", 1, 0)  // Convert race to a binary categorical variable
gen exp_cat = cond(exp == "TRUE", 1, 0)  // Convert experience to a binary categorical variable

* Estimate mean callback rates by race and experience with SEs clustered at the posting level
reg callback i.race_cat##i.exp_cat, cluster(url)

margins race_cat#exp_cat, post

************************
* Table A.2 Regressions*
************************

insheet using "Data/jobsavail.csv", clear
save "Data/temp_jobsavail.dta", replace
insheet using "Data/callback.csv", clear
merge m:1 location_id using "Data/temp_jobsavail.dta"
drop _merge

replace exp="0" if exp=="FALSE"
replace exp="1" if exp=="TRUE"
destring exp, replace
gen jobs_per_thousand = sum/popu * 1000

* (1)
reg callback c.jobs_per_thousand##exp, r cluster(url)
* (2)
reg callback c.sum##exp, r cluster(url)
